###############################################
# Replication Code for                        #
# "Why Process Matters for Causal Inference"  #
# by Adam N. Glynn and Kevin M. Quinn         #
############################################### 

## get the CPS data and process it
load("CPSvote2004.RData")


VOTECPSnd <- subset(VOTECPS, GESTFIPS != 38)

EDR <- rep(0,dim(VOTECPSnd)[1])
EDR[VOTECPSnd$GESTFIPS == 16] <- 1 # Idaho
EDR[VOTECPSnd$GESTFIPS == 27 ] <- 1 #Maine
EDR[VOTECPSnd$GESTFIPS == 26 ] <- 1 # Minnesota
EDR[VOTECPSnd$GESTFIPS == 33] <- 1 # New Hampshire
EDR[VOTECPSnd$GESTFIPS == 55] <- 1 # Wisconsin
EDR[VOTECPSnd$GESTFIPS == 56] <- 1 # Wyoming

VOTECPSnd <- data.frame(VOTECPSnd,EDR)
VOTECPSnd <- subset(VOTECPSnd,VOTE == 1 | VOTE == 2)
VOTECPSnd <- subset(VOTECPSnd,REG == 1 | REG == 2)

VOTECPSnd$REG[VOTECPSnd$REG ==2] <- 0
VOTECPSnd$VOTE[VOTECPSnd$VOTE==2] <- 0 

## get just blacks
VOTECPSndB <- subset(VOTECPSnd,PTDTRACE == 2)


mydata <- data.frame(vote=VOTECPSndB$VOTE,
                     edr=VOTECPSndB$EDR,
                     famincome=VOTECPSndB$HUFAMINC,
                     age=VOTECPSndB$PRTAGE,
                     sex=VOTECPSndB$PESEX,
                     educ=VOTECPSndB$PEEDUCA,
                     hisp=VOTECPSndB$PEHSPNON)

mydata$age[mydata$age > 80] <- NA ## get rid of values above what the codebook
                                  ## says is the max

mydata$famincome[mydata$famincome < 1 ] <- NA ## get rid of values below what
                                              ## the codebook says is the min

mydata <- na.omit(mydata) ## listwise deletion (useful for matching code)




## estimate propensity scores to use in the matching algorithm
library(mgcv)
gam.out <- gam(edr ~ sex + s(famincome, age, educ, by=as.factor(sex)),
               family=binomial, data=mydata)

mydata$pscore <- predict(gam.out)










library(Matching)

set.seed(72303)

genmatch.out <- GenMatch(Tr=mydata$edr,
                         X=mydata[,c(3,4,5,6,8)],
                         estimand="ATC", pop.size=1000)

match.out <- Match(Y=mydata$vote, Tr=mydata$edr,
                   X=mydata[,c(3,4,5,6,8)],
                   estimand="ATC",
                   Weight.matrix=genmatch.out)

bal.out <- MatchBalance(edr ~ famincome + age + sex + educ + pscore,
                        data=mydata, match.out=match.out, paired=FALSE,
                        nboots=500)


## table 3 in the article
print(summary(match.out))


matched.control <- mydata[genmatch.out$matches[,2],]
matched.control$weights <- genmatch.out$matches[,3]

matched.treated <- mydata[genmatch.out$matches[,1],]
matched.treated$weights <- genmatch.out$matches[,3]


## Figure 1 in the article
pdf("CPS-Balance.pdf", height=11, width=8.5, bg="white")
## before table for famincome
par(mfrow=c(5,2))
fi.pre.treat <- table(mydata$famincome[mydata$edr==1]) / sum(table(mydata$famincome[mydata$edr==1]))

fi.pre.cont <- table(mydata$famincome[mydata$edr==0]) / sum(table(mydata$famincome[mydata$edr==0]))

barplot(rbind(fi.pre.treat, fi.pre.cont), beside=TRUE,
        main="Family Income (Prior to Matching)") 


## after table for famincome
nobs <- sum(matched.treated$weights)
holder <- table(matched.treated$faminc, weights=matched.treated$weights)
fi.post.treat <- rep(NA, 16)
names(fi.post.treat) <- 1:16
for (i in 1:16){
  fi.post.treat[i] <- holder[i,1]*.5 + holder[i,2]
}
fi.post.treat <- fi.post.treat / nobs

holder <- table(matched.control$faminc, weights=matched.control$weights)
fi.post.cont <- rep(NA, 16)
for (i in 1:16){
  fi.post.cont[i] <- holder[i,1]*.5 + holder[i,2]
}
fi.post.cont <- fi.post.cont / nobs

barplot(rbind(fi.post.treat, fi.post.cont), beside=TRUE,
        main="Family Income (After Matching)") 




## before table for educ
educ.pre.treat <- table(mydata$educ[mydata$edr==1]) / sum(table(mydata$educ[mydata$edr==1]))

educ.pre.cont <- table(mydata$educ[mydata$edr==0]) / sum(table(mydata$educ[mydata$edr==0]))

educ.pre.mat <- matrix(NA, 2, 16)
educ.pre.mat[2,] <- educ.pre.cont
colnames(educ.pre.mat) <- names(educ.pre.cont)
educ.pre.mat[1,names(educ.pre.treat)] <- educ.pre.treat

barplot(educ.pre.mat, beside=TRUE,
        main="Education (Prior to Matching)") 


## after table for educ
nobs <- sum(matched.treated$weights)
holder <- table(matched.treated$educ, weights=matched.treated$weights)
educ.post.treat <- rep(NA, 16)
names(educ.post.treat) <- 31:46
for (i in 1:16){
  var <- names(educ.post.treat)[i]
  if (var %in% rownames(holder)){
    educ.post.treat[var] <- holder[var,1]*.5 + holder[var,2]
  }
}
educ.post.treat <- educ.post.treat / nobs


holder <- table(matched.control$educ, weights=matched.control$weights)
educ.post.cont <- rep(NA, 16)
names(educ.post.cont) <- 31:46
for (i in 1:16){
  var <- names(educ.post.cont)[i]
  if (var %in% rownames(holder)){
    educ.post.cont[var] <- holder[var,1]*.5 + holder[var,2]
  }
}
educ.post.cont <- educ.post.cont / nobs

barplot(rbind(educ.post.treat, educ.post.cont), beside=TRUE,
        main="Education (After Matching)") 





## before table for sex
sex.pre.treat <- table(mydata$sex[mydata$edr==1]) / sum(table(mydata$sex[mydata$edr==1]))

sex.pre.cont <- table(mydata$sex[mydata$edr==0]) / sum(table(mydata$sex[mydata$edr==0]))

sex.pre.mat <- matrix(NA, 2, 2)
sex.pre.mat[2,] <- sex.pre.cont
colnames(sex.pre.mat) <- c("Male", "Female")
sex.pre.mat[1,] <- sex.pre.treat

barplot(sex.pre.mat, beside=TRUE,
        main="Sex (Prior to Matching)") 


## after table for sex
nobs <- sum(matched.treated$weights)
holder <- table(matched.treated$sex, weights=matched.treated$weights)
sex.post.treat <- rep(NA, 2)
names(educ.post.treat) <- c("Male", "Female")
for (i in 1:2){
  sex.post.treat[i] <- holder[i,1]*.5 + holder[i,2]
}
sex.post.treat <- sex.post.treat / nobs


holder <- table(matched.control$sex, weights=matched.control$weights)
sex.post.cont <- rep(NA, 2)
names(sex.post.cont) <- c("Male", "Female")
for (i in 1:2){
  sex.post.cont[i] <- holder[i,1]*.5 + holder[i,2]
}
sex.post.cont <- sex.post.cont / nobs

barplot(rbind(sex.post.treat, sex.post.cont), beside=TRUE,
         main="Sex (After Matching)") 






## age before 
age.pre.treat <- mydata$age[mydata$edr==1]
age.pre.cont <- mydata$age[mydata$edr==0]
plot(density(age.pre.cont), lty=2, col="red",
     main="Age (Prior to Matching)", xlab="", ylim=c(0, 0.025), lwd=2)
lines(density(age.pre.treat), lty=1, col="black", lwd=2)


## age after 
age.post.treat <- matched.treated$age
age.post.cont <- matched.control$age
plot(density(age.post.cont,
     weights=(matched.control$weights/nobs)), lty=2, col="red",
     main="Age (After Matching)", xlab="", ylim=c(0, 0.025), lwd=2)
lines(density(age.post.treat, weights=(matched.treated$weights/nobs)),
      lty=1, col="black", lwd=2)




## pscore before 
pscore.pre.treat <- mydata$pscore[mydata$edr==1]
pscore.pre.cont <- mydata$pscore[mydata$edr==0]
plot(density(pscore.pre.cont), lty=2, col="red",
     main="Propensity Score (Prior to Matching)", xlab="", ylim=c(0, 1.75),
     xlim=c(-6,-1), lwd=2)
lines(density(pscore.pre.treat), lty=1, col="black", lwd=2)


## pscore after 
pscore.post.treat <- matched.treated$pscore
pscore.post.cont <- matched.control$pscore
plot(density(pscore.post.cont,
     weights=(matched.control$weights/nobs)), lty=2, col="red",
     main="Propensity Score (After Matching)", xlab="", ylim=c(0, 1.75),
     xlim=c(-6,-1), lwd=2)
lines(density(pscore.post.treat, weights=(matched.treated$weights/nobs)),
      lty=1, col="black", lwd=2)



dev.off()






